package com.hyj.jpdy.grap.html;



import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.safety.Whitelist;
import org.jsoup.select.Elements;

public class jsoup {
  public static void main(String... arg ){
	  String url = "http://www.sina.com";  
	  try {
		Document  doc = Jsoup.connect(url).
		   header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:16.0) Gecko/20100101 Firefox/16.0").
		   timeout(2000).get();
		//System.out.println(doc.toString());
		
		Elements div = doc.getElementsByTag("div");
		Element content = doc.getElementById("syncad_0"); 
		Elements links = content.getElementsByTag("a");
		for (Element link : links) {  
			String linkHref = link.attr("href");
			String linkText = link.text();
			System.out.println(linkHref);
			}
		
/*		Elements imgs = doc.getElementsByTag("img");
		for (Element img : imgs) {  
			String linkHref = img.attr("src");
			String linkText = img.text();
			System.out.println(linkHref);
			}*/
		url = "http://news.sina.com.cn/c/2013-04-17/152026855555.shtml";
		Document sinaNew = Jsoup.connect(url).
		   header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:16.0) Gecko/20100101 Firefox/16.0").
		   timeout(2000).get();
		System.out.println();
		String title = sinaNew.getElementById("artibodyTitle").text();
		System.out.println(title);
		
		//正文
		String info1 = sinaNew.getElementById("artibody").html();
		 
		System.out.println(info1);
		System.out.println("==================================");
		String info = Jsoup.clean(sinaNew.getElementById("artibody").html(), Whitelist.basic());
		 
		System.out.println(info);
		
		System.out.println("==================================");
		
		Elements infos =  sinaNew.getElementById("artibody").getElementsByTag("p");
		for(Element text :infos){
			System.out.println(text.text());
		} 
		Elements e  = sinaNew.getElementsByAttributeValue("class", "artInfo");
		System.out.println(e.text());
		
	    } catch (IOException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}  
  }
}
